# -*- coding: utf-8 -*-
# @Date    : 2021/5/15
# @Author  : Maoxian


# 作业内容：
# 爬取jd上关于无人机的10页数据，数据包含商品名称，价格，店铺名称


# 分析：
# 搜索无人机后，一页有30条数据，向下滚动会出现剩下的30条，共60条数据
# 滚动或这点击下一页，一页60数据是2个page，点击下一页page会+2
# 分析url为：https://search.jd.com/s_new.php?keyword=&qrst=1&stock=1&page=

import time
import requests
from lxml import etree

rs = requests.Session()
rs.headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'
}


def get_data(url):
    res = rs.get(url)

    # 文本处理
    text = res.text.replace('<font class="skcolor_ljg">新款</font>', '新款')
    text = text.replace('<font class="skcolor_ljg">无人机</font>', '无人机')
    text = text.replace('<font class="skcolor_ljg">无</font>', '无')
    text = text.replace('<font class="skcolor_ljg">人</font>', '人')
    text = text.replace('<font class="skcolor_ljg">机</font>', '机')
    text = text.replace('\t', '')
    text = text.replace('\n', '')

    tree = etree.HTML(text)
    items = tree.xpath('//div[@class="gl-i-wrap"]')
    for item in items:
        data = dict(
            name=item.xpath('./div[@class="p-name p-name-type-2"]/a/em/text()')[0],
            price=item.xpath('./div[@class="p-price"]/strong/i/text()')[0],
            shop=item.xpath('./div[@class="p-shop"]/span/a[1]/text()')[0]
        )
        print(data)

    referer = res.url
    print(referer)
    return referer


if __name__ == '__main__':
    keyword = '无人机'
    for i in range(1, 11):
        if i == 1:
            url = f'https://search.jd.com/Search?keyword={keyword}&enc=utf-8&qrst=1&stock=1&page={i}&s=1&click=0'
        else:
            url = f'https://search.jd.com/s_new.php?keyword={keyword}&qrst=1&stock=1&page={i}&scrolling=y'

        rs.headers['referer'] = get_data(url)
        time.sleep(1)
